Code
colnames_to_underscores <- function(data = NULL) {
dat <- data
names(dat) <- stringr::str_replace_all(names(dat), pattern = " ", replacement = "_")
return(dat)
}Our analysis will evaluate the pilot outcomes for the Perceived Social Categorization Study. Participants rated the same set of 85 photos for perceived “Jewishness” and “Arabness.” Our objective is to pinpoint images where the average ratings across these two dimensions do not significantly diverge. This nuanced approach enables us to select images that best represent a balanced perception, laying a robust foundation for our main study and ensuring the integrity and relevance of our visual stimuli.
Utilize the provided helper function colnames_to_underscores to standardize column names by replacing spaces and special characters with underscores.
colnames_to_underscores <- function(data = NULL) {
dat <- data
names(dat) <- stringr::str_replace_all(names(dat), pattern = " ", replacement = "_")
return(dat)
}get_summary_stats <- function(demo_wide_clean) {
summary_stats <- summary(demo_wide_clean)
return(summary_stats)
}Import data sets related to the main task, attention checks, and demographics.
data_categorization_jwish_first <- read_csv("../Data/data_exp_143127-v17_task-jwishfirstrealdata.csv", show_col_types = FALSE)
data_categorization_Arab_first <- read_csv("../Data/data_exp_143127-v17_task-4qo7Arabfirstrealdata.csv", show_col_types = FALSE)
att_check <- read_csv("../Data/data_exp_143127-v17_task-sfst_ATTcheck.csv", show_col_types = F)
data_demo <- read_csv("../Data/data_exp_143127-v17_questionnaire-jj6n_demo_all_long_for.csv", show_col_types = F)att_check <- att_check|>
colnames_to_underscores() |>
dplyr::filter(str_detect(Zone_Type, pattern = "endValue")) |>
select(Participant_Private_ID, Response) |>
mutate(Participant_Private_ID = factor(Participant_Private_ID))Identifying who failed the attention checks (answer > 5)
failed_IDs <- att_check |>
dplyr::filter(Response > 5) |>
select(Participant_Private_ID)data_participants <- rbind(data_categorization_Arab_first,data_categorization_jwish_first)|>
colnames_to_underscores() |>
dplyr::filter(!(Participant_Private_ID %in% failed_IDs$Participant_Private_ID)) |>
dplyr::filter(display %in% c("task_Jewish", "task_Arab")) |> # removing instructions screens
dplyr::filter(Zone_Type == "response_slider_endValue") |> # only subjects answers
select(Participant_Private_ID, Response, image, Reaction_Time, display, Task_Name) |>
mutate(Participant_Private_ID = factor(Participant_Private_ID),
image = factor(image),
Task_Name = factor(Task_Name),
display = factor(display)) |>
mutate(Task_Name = case_when(
Task_Name == "Group_categorization_JewishFirst_pilot2" ~ "JewishFirst",
Task_Name == "Group_categorization_ArabFirst_pilot2" ~ "ArabFirst",
TRUE ~ Task_Name
)) |>
rename(order_of_conditions = Task_Name)
num_participants <- n_distinct(data_participants$Participant_Private_ID)Density plots for participants’ responses based on the dysplay conditions, labeled as “task Arab” versus “task Jewish.”
density_plot2 <- ggplot(data_participants, aes(x = Response, fill = display)) +
geom_density(alpha = 0.5) + # Plot density
geom_rug(aes(color = display), sides = "b") + # Add rug plot at the bottom
scale_fill_brewer(palette = "Pastel1") + # Use Pastel1 palette for fill
scale_color_brewer(palette = "Pastel1") + # Use Pastel1 palette for rug and mean line colors
theme_minimal() +
labs(title = "Density of Ratings by Display", x = "Rating", y = "Density") +
geom_vline(data = data_participants %>% group_by(display) |>
summarise(mean_response = mean(Response, na.rm = TRUE)),
aes(xintercept = mean_response),
linetype = "dashed", color = "black", size = 0.5)
print(density_plot2)ggsave("density_plot_with_all.png", density_plot2, path = "../Plots/", width = 10, height = 8, units = "in", bg = "white")Identify and exclude outliers from our data set using The MAD-median rule for outlier removal as recommended by Bakker and Wicherts (2014).
#Threshold Determiantion
threshold <- 2.24
# Calculate the median and MAD for the Response column
median_response <- median(data_participants$Response, na.rm = TRUE)
mad_response <- mad(data_participants$Response, constant = 1, na.rm = TRUE)
lower_bound <- median_response - threshold * mad_response
upper_bound <- median_response + threshold * mad_response
outlier_indices <- which(data_participants$Response < lower_bound | data_participants$Response > upper_bound)
data_participants$is_outlier <- ifelse(data_participants$Response < lower_bound | data_participants$Response > upper_bound, 1, 0)
outliers <- data_participants[outlier_indices, ]
data_cleaned <- data_participants[!data_participants$Response %in% outliers$Response, ]
set.seed(14)
# test <- data_participants |>
# #mutate(is_outlier = sample(x = c(0, 1), size = nrow(data_participants), replace = T, prob = c(.8, .2))) |>
# filter(is_outlier == 1) |>
# group_by(Participant_Private_ID) |>
# mutate(n_trials = n()) |>
# mutate(bad_trials = 85*2 - n_trials) |>
# mutate(percent_bad_trials = n_bad_trials / (85*2))
# #filter(percent_bad_trials <= 0.2)
# library(dplyr)
test1 <- data_participants |>
filter(is_outlier == 1) |>
group_by(Participant_Private_ID) |>
mutate(bad_trials = n(),
#bad_trials = 85 * 2 - n_trials,
percent_bad_trials = bad_trials / (85 * 2))
num_participants_out <- n_distinct(test1$Participant_Private_ID)A table showing the average standard deviation of each subject’s ratings beyond display types
participant_sd_ratings <- data_participants |>
group_by(Participant_Private_ID) |>
summarise(SD_of_Ratings = sd(Response, na.rm = TRUE)) |>
ungroup()
kable(participant_sd_ratings, caption = "Standard Deviation of Ratings for Each Participant")| Participant_Private_ID | SD_of_Ratings |
|---|---|
| 10514858 | 19.72960 |
| 10515072 | 41.73836 |
| 10515173 | 23.39740 |
| 10515193 | 31.32064 |
| 10515201 | 36.83332 |
| 10515243 | 40.58422 |
| 10515319 | 35.68966 |
| 10515327 | 32.17920 |
| 10515904 | 47.55128 |
| 10515918 | 22.24714 |
| 10515942 | 10.57305 |
| 10515960 | 19.62875 |
| 10515998 | 35.48673 |
| 10516258 | 24.88498 |
| 10516270 | 25.19615 |
| 10516302 | 29.10667 |
| 10516415 | 32.04238 |
| 10516645 | 22.79964 |
| 10516756 | 20.96220 |
| 10517522 | 29.27106 |
| 10517752 | 37.97194 |
| 10517925 | 40.37224 |
| 10518377 | 24.35308 |
| 10519057 | 37.29496 |
| 10519217 | 11.59149 |
| 10519319 | 29.61986 |
| 10519805 | 32.12981 |
| 10520066 | 21.90053 |
| 10520207 | 32.73827 |
| 10520244 | 25.67647 |
| 10520416 | 28.98994 |
| 10520443 | 26.83289 |
| 10520649 | 31.50339 |
| 10520738 | 28.81733 |
| 10522475 | 29.30831 |
| 10522511 | 18.49479 |
| 10522561 | 27.52098 |
| 10527960 | 31.49570 |
| 10528079 | 22.82760 |
| 10528402 | 12.17067 |
| 10528576 | 30.03514 |
| 10529232 | 36.92276 |
| 10530076 | 37.41983 |
| 10530576 | 20.11152 |
| 10530858 | 29.53245 |
| 10531834 | 23.79220 |
Examining how the order of conditions affects ratings of images as “Arab” or “Jewish,” to ensure there is no influence of presentation sequence on perceptions. Visualization of order effect
# Visualization of order effects
order_effect_plot <- ggplot(data_participants, aes(x = order_of_conditions, y = Response, fill = display)) +
geom_boxplot() +
stat_summary(fun = mean, geom = "errorbar", aes(ymax = ..y.., ymin = ..y..), width = 0.75, color = "red") +
facet_wrap(~display, scales = "free") +
labs(title = "Order Effect on Ratings",
x = "Order of Conditions",
y = "Rating") +
theme_minimal() +
theme(plot.background = element_rect(fill = "white"), # Set plot background to white
panel.background = element_rect(fill = "white"), # Ensure panel background is white
text = element_text(color = "black")) + # Ensure text is black
scale_fill_brewer(palette = "Pastel1")
order_effect_plotggsave("order_effect_with_all.png", order_effect_plot, path = "../Plots/", width = 4000, height = 4000, units = "px")Perform a t-test to see if there’s a significant difference in ratings between orders
mean_ratings_by_order <- data_participants |>
group_by(order_of_conditions, display) |>
summarise(mean_rating = mean(Response, na.rm = TRUE), .groups = 'drop') |>
pivot_wider(names_from = display, values_from = mean_rating)
t_test_result_jewish <- t.test(Response ~ order_of_conditions,
data = dplyr::filter(data_participants, display == "task_Jewish"),
alternative = "two.sided")
t_test_result_arab <- t.test(Response ~ order_of_conditions,
data = dplyr::filter(data_participants, display == "task_Arab"),
alternative = "two.sided")t_test_results <- data.frame(
Display = c("Jewish", "Arab"),
Statistic = c(t_test_result_jewish$statistic, t_test_result_arab$statistic),
P_Value = c(t_test_result_jewish$p.value, t_test_result_arab$p.value) # Difference of means, NA for the second row
)
# Create a table from the results
kable(t_test_results, caption = "T-Test Results for Jewish and Arab Displays", format = "markdown")| Display | Statistic | P_Value |
|---|---|---|
| Jewish | 1.9062385 | 0.0566926 |
| Arab | -0.7360867 | 0.4617223 |
markdown_table <- kable(t_test_results, caption = "T-Test Results for Jewish and Arab Displays", format = "markdown")
writeLines(markdown_table, "t_test_results.md")data_images_10<- data_participants |>
group_by(image, Participant_Private_ID) |>
#dplyr::filter(!(Participant_Private_ID %in% exclude_participant_IDs))|>
dplyr::summarize(
task_Jewish = mean(Response[display == "task_Jewish"], na.rm = TRUE),
task_Arab = mean(Response[display == "task_Arab"], na.rm = TRUE),
.groups = 'drop' ) |> # Calculate the difference in ratings for each participant and image
mutate(diff_per_participant = task_Jewish - task_Arab) |>
# Aggregate at the image level
group_by(image) |>
dplyr::summarize(
avg_diff = mean(diff_per_participant, na.rm = TRUE),
.groups = 'drop') |># Classify based on the average difference
mutate(
rated_ethnicity = case_when(
avg_diff < -10 ~ "Arab",
avg_diff > 10 ~ "Jewish",
TRUE ~ "Ambiguous"
)
)|>
mutate(avg_diff = abs(avg_diff)) |>
arrange(avg_diff)
data_images_big_diff_10 <- data_images_10 |>
dplyr::filter(abs(avg_diff) >= 10) |>
mutate(rated_ethnicity = case_when(avg_diff < 0 ~ "Arab",
avg_diff > 0 ~ "Jewish",
.default = NA))
data_images_choosen_10 <- data_images_10 |>
dplyr::filter(abs(avg_diff)<10)|>
mutate(rated_ethnicity = case_when(avg_diff < 0 ~ "Arab",
avg_diff > 0 ~ "Jewish",
.default = NA))
kable(data_images_10, caption = "Difference of means with a cutoff of 10 points")| image | avg_diff | rated_ethnicity |
|---|---|---|
| CFD_M-212-N.png | 0.0000000 | Ambiguous |
| CFD_M-242-N.png | 0.0217391 | Ambiguous |
| CFD_M-211-N.png | 0.0434783 | Ambiguous |
| CFD_M-234-N.png | 0.7173913 | Ambiguous |
| IFD_M-086-N.png | 0.8913043 | Ambiguous |
| CFD_M-227-N.png | 1.3260870 | Ambiguous |
| IFD_M-018-N.png | 1.3260870 | Ambiguous |
| CFD_M-214-N.png | 2.1304348 | Ambiguous |
| CFD_M-236-N.png | 2.2391304 | Ambiguous |
| IFD_M-105-N.png | 2.5869565 | Ambiguous |
| CFD_M-206-N.png | 3.6304348 | Ambiguous |
| CFD_M-218-N.png | 4.2391304 | Ambiguous |
| CFD_M-220-N.png | 4.3695652 | Ambiguous |
| CFD_M-253-N.png | 4.7391304 | Ambiguous |
| CFD_M-248-N.png | 5.1956522 | Ambiguous |
| IFD_M-421-N.png | 5.2826087 | Ambiguous |
| IFD_M-132-N.png | 5.6086957 | Ambiguous |
| IFD_M-419-N.png | 6.2608696 | Ambiguous |
| CFD_M-237-N.png | 6.4130435 | Ambiguous |
| IFD_M-135-N.png | 8.8043478 | Ambiguous |
| IFD_M-108-N.png | 9.0000000 | Ambiguous |
| CFD_M-216-N.png | 9.3695652 | Ambiguous |
| IFD_M-036-N.png | 9.4565217 | Ambiguous |
| IFD_M-416-N.png | 9.9130435 | Ambiguous |
| CFD_M-224-N.png | 9.9347826 | Ambiguous |
| CFD_M-243-N.png | 10.0217391 | Arab |
| CFD_M-247-N.png | 10.7608696 | Arab |
| IFD_M-117-N.png | 10.8043478 | Jewish |
| IFD_M-122-N.png | 11.0217391 | Jewish |
| CFD_M-229-N.png | 11.1086957 | Arab |
| IFD_M-067-N.png | 11.1521739 | Arab |
| CFD_M-231-N.png | 11.3478261 | Jewish |
| IFD_M-100-N.png | 11.3913043 | Jewish |
| CFD_M-225-N.png | 11.5217391 | Arab |
| IFD_M-121-N.png | 11.9347826 | Jewish |
| CFD_M-204-N.png | 13.4130435 | Jewish |
| CFD_M-222-N.png | 14.3043478 | Jewish |
| IFD_M-418-N.png | 14.4130435 | Jewish |
| IFD_M-033-N.png | 14.5652174 | Jewish |
| IFD_M-424-N.png | 15.0000000 | Arab |
| IFD_M-441-N.png | 15.4565217 | Jewish |
| CFD_M-251-N.png | 15.5652174 | Jewish |
| IFD_M-136-N.png | 15.6739130 | Arab |
| IFD_M-021-N.png | 15.8478261 | Jewish |
| CFD_M-221-N.png | 16.6956522 | Jewish |
| CFD_M-213-N.png | 17.2391304 | Arab |
| CFD_M-230-N.png | 17.5434783 | Arab |
| CFD_M-200-N.png | 17.9782609 | Jewish |
| IFD_M-015-N.png | 18.3260870 | Jewish |
| IFD_M-075-N.png | 19.5652174 | Arab |
| IFD_M-113-N.png | 19.5652174 | Jewish |
| IFD_M-062-N.png | 19.5869565 | Arab |
| IFD_M-420-N.png | 19.6086957 | Arab |
| CFD_M-246-N.png | 19.8478261 | Arab |
| CFD_M-223-N.png | 19.9130435 | Arab |
| IFD_M-044-N.png | 20.5217391 | Arab |
| IFD_M-087-N.png | 21.6304348 | Jewish |
| CFD_M-252-N.png | 22.3260870 | Arab |
| IFD_M-051-N.png | 23.0217391 | Arab |
| IFD_M-042-N.png | 23.1086957 | Arab |
| IFD_M-035-N.png | 23.4130435 | Jewish |
| CFD_M-239-N.png | 24.5000000 | Arab |
| CFD_M-238-N.png | 25.4565217 | Arab |
| CFD_M-210-N.png | 26.6739130 | Arab |
| IFD_M-017-N.png | 27.1956522 | Jewish |
| IFD_M-111-N.png | 28.5000000 | Arab |
| CFD_M-232-N.png | 28.6739130 | Arab |
| IFD_M-114-N.png | 29.5652174 | Jewish |
| IFD_M-032-N.png | 29.6304348 | Arab |
| IFD_M-097-N.png | 29.7826087 | Arab |
| IFD_M-084-N.png | 29.8478261 | Arab |
| IFD_M-020-N.png | 31.1521739 | Arab |
| CFD_M-250-N.png | 31.6304348 | Jewish |
| CFD_M-235-N.png | 32.0217391 | Arab |
| IFD_M-049-N.png | 33.4347826 | Arab |
| IFD_M-028-N.png | 36.5217391 | Jewish |
| CFD_M-201-N.png | 39.1086957 | Jewish |
| IFD_M-069-N.png | 41.6956522 | Arab |
| IFD_M-423-N.png | 44.1086957 | Arab |
| IFD_M-107-N.png | 49.4130435 | Arab |
| CFD_M-202-N.png | 50.2608696 | Arab |
| IFD_M-066-N.png | 60.5000000 | Arab |
| IFD_M-039-N.png | 61.4565217 | Arab |
| IFD_M-045-N.png | 61.7391304 | Arab |
| IFD_M-046-N.png | 70.9347826 | Arab |
saveRDS(data_images_10, file = "C:/Users/97252/Documents/GitHub/face_cater/PILOT- 1 face categorization/Data/final_data.rdx")Summary Table:
summary_table <- data_images_10 |>
count(rated_ethnicity) |>
spread(key = rated_ethnicity, value = n)
# Print the summary table
print(summary_table)# A tibble: 1 × 3
Ambiguous Arab Jewish
<int> <int> <int>
1 25 37 23
data_images_15<- data_participants |>
group_by(image, Participant_Private_ID) |>
#dplyr::filter(!(Participant_Private_ID %in% exclude_participant_IDs))|>
dplyr::summarize(
task_Jewish = mean(Response[display == "task_Jewish"], na.rm = TRUE),
task_Arab = mean(Response[display == "task_Arab"], na.rm = TRUE),
.groups = 'drop' ) |> # Calculate the difference in ratings for each participant and image
mutate(diff_per_participant = task_Jewish - task_Arab) |>
# Aggregate at the image level
group_by(image) |>
dplyr::summarize(
avg_diff = mean(diff_per_participant, na.rm = TRUE),
.groups = 'drop') |># Classify based on the average difference
mutate(
rated_ethnicity = case_when(
avg_diff < -15 ~ "Arab",
avg_diff > 15 ~ "Jewish",
TRUE ~ "Ambiguous"
)
)|>
mutate(avg_diff = abs(avg_diff)) |>
arrange(avg_diff)
data_images_big_diff_15 <- data_images_15 |>
dplyr::filter(abs(avg_diff) >= 15) |>
mutate(rated_ethnicity = case_when(avg_diff < 0 ~ "Arab",
avg_diff > 0 ~ "Jewish",
.default = NA))
data_images_choosen_15 <- data_images_15 |>
dplyr::filter(abs(avg_diff)<15)|>
mutate(rated_ethnicity = case_when(avg_diff < 0 ~ "Arab",
avg_diff > 0 ~ "Jewish",
.default = NA))
kable(data_images_15, caption = "Difference of means with a cutoff of 15 points")| image | avg_diff | rated_ethnicity |
|---|---|---|
| CFD_M-212-N.png | 0.0000000 | Ambiguous |
| CFD_M-242-N.png | 0.0217391 | Ambiguous |
| CFD_M-211-N.png | 0.0434783 | Ambiguous |
| CFD_M-234-N.png | 0.7173913 | Ambiguous |
| IFD_M-086-N.png | 0.8913043 | Ambiguous |
| CFD_M-227-N.png | 1.3260870 | Ambiguous |
| IFD_M-018-N.png | 1.3260870 | Ambiguous |
| CFD_M-214-N.png | 2.1304348 | Ambiguous |
| CFD_M-236-N.png | 2.2391304 | Ambiguous |
| IFD_M-105-N.png | 2.5869565 | Ambiguous |
| CFD_M-206-N.png | 3.6304348 | Ambiguous |
| CFD_M-218-N.png | 4.2391304 | Ambiguous |
| CFD_M-220-N.png | 4.3695652 | Ambiguous |
| CFD_M-253-N.png | 4.7391304 | Ambiguous |
| CFD_M-248-N.png | 5.1956522 | Ambiguous |
| IFD_M-421-N.png | 5.2826087 | Ambiguous |
| IFD_M-132-N.png | 5.6086957 | Ambiguous |
| IFD_M-419-N.png | 6.2608696 | Ambiguous |
| CFD_M-237-N.png | 6.4130435 | Ambiguous |
| IFD_M-135-N.png | 8.8043478 | Ambiguous |
| IFD_M-108-N.png | 9.0000000 | Ambiguous |
| CFD_M-216-N.png | 9.3695652 | Ambiguous |
| IFD_M-036-N.png | 9.4565217 | Ambiguous |
| IFD_M-416-N.png | 9.9130435 | Ambiguous |
| CFD_M-224-N.png | 9.9347826 | Ambiguous |
| CFD_M-243-N.png | 10.0217391 | Ambiguous |
| CFD_M-247-N.png | 10.7608696 | Ambiguous |
| IFD_M-117-N.png | 10.8043478 | Ambiguous |
| IFD_M-122-N.png | 11.0217391 | Ambiguous |
| CFD_M-229-N.png | 11.1086957 | Ambiguous |
| IFD_M-067-N.png | 11.1521739 | Ambiguous |
| CFD_M-231-N.png | 11.3478261 | Ambiguous |
| IFD_M-100-N.png | 11.3913043 | Ambiguous |
| CFD_M-225-N.png | 11.5217391 | Ambiguous |
| IFD_M-121-N.png | 11.9347826 | Ambiguous |
| CFD_M-204-N.png | 13.4130435 | Ambiguous |
| CFD_M-222-N.png | 14.3043478 | Ambiguous |
| IFD_M-418-N.png | 14.4130435 | Ambiguous |
| IFD_M-033-N.png | 14.5652174 | Ambiguous |
| IFD_M-424-N.png | 15.0000000 | Ambiguous |
| IFD_M-441-N.png | 15.4565217 | Jewish |
| CFD_M-251-N.png | 15.5652174 | Jewish |
| IFD_M-136-N.png | 15.6739130 | Arab |
| IFD_M-021-N.png | 15.8478261 | Jewish |
| CFD_M-221-N.png | 16.6956522 | Jewish |
| CFD_M-213-N.png | 17.2391304 | Arab |
| CFD_M-230-N.png | 17.5434783 | Arab |
| CFD_M-200-N.png | 17.9782609 | Jewish |
| IFD_M-015-N.png | 18.3260870 | Jewish |
| IFD_M-075-N.png | 19.5652174 | Arab |
| IFD_M-113-N.png | 19.5652174 | Jewish |
| IFD_M-062-N.png | 19.5869565 | Arab |
| IFD_M-420-N.png | 19.6086957 | Arab |
| CFD_M-246-N.png | 19.8478261 | Arab |
| CFD_M-223-N.png | 19.9130435 | Arab |
| IFD_M-044-N.png | 20.5217391 | Arab |
| IFD_M-087-N.png | 21.6304348 | Jewish |
| CFD_M-252-N.png | 22.3260870 | Arab |
| IFD_M-051-N.png | 23.0217391 | Arab |
| IFD_M-042-N.png | 23.1086957 | Arab |
| IFD_M-035-N.png | 23.4130435 | Jewish |
| CFD_M-239-N.png | 24.5000000 | Arab |
| CFD_M-238-N.png | 25.4565217 | Arab |
| CFD_M-210-N.png | 26.6739130 | Arab |
| IFD_M-017-N.png | 27.1956522 | Jewish |
| IFD_M-111-N.png | 28.5000000 | Arab |
| CFD_M-232-N.png | 28.6739130 | Arab |
| IFD_M-114-N.png | 29.5652174 | Jewish |
| IFD_M-032-N.png | 29.6304348 | Arab |
| IFD_M-097-N.png | 29.7826087 | Arab |
| IFD_M-084-N.png | 29.8478261 | Arab |
| IFD_M-020-N.png | 31.1521739 | Arab |
| CFD_M-250-N.png | 31.6304348 | Jewish |
| CFD_M-235-N.png | 32.0217391 | Arab |
| IFD_M-049-N.png | 33.4347826 | Arab |
| IFD_M-028-N.png | 36.5217391 | Jewish |
| CFD_M-201-N.png | 39.1086957 | Jewish |
| IFD_M-069-N.png | 41.6956522 | Arab |
| IFD_M-423-N.png | 44.1086957 | Arab |
| IFD_M-107-N.png | 49.4130435 | Arab |
| CFD_M-202-N.png | 50.2608696 | Arab |
| IFD_M-066-N.png | 60.5000000 | Arab |
| IFD_M-039-N.png | 61.4565217 | Arab |
| IFD_M-045-N.png | 61.7391304 | Arab |
| IFD_M-046-N.png | 70.9347826 | Arab |
saveRDS(data_images_15, file = "C:/Users/97252/Documents/GitHub/face_cater/PILOT- 1 face categorization/Data/final_data.rdx")Summary Table:
summary_table_15 <- data_images_15 |>
count(rated_ethnicity) |>
spread(key = rated_ethnicity, value = n)
# Print the summary table
print(summary_table_15)# A tibble: 1 × 3
Ambiguous Arab Jewish
<int> <int> <int>
1 40 31 14
data_images_20<- data_participants |>
group_by(image, Participant_Private_ID) |>
#dplyr::filter(!(Participant_Private_ID %in% exclude_participant_IDs))|>
dplyr::summarize(
task_Jewish = mean(Response[display == "task_Jewish"], na.rm = TRUE),
task_Arab = mean(Response[display == "task_Arab"], na.rm = TRUE),
.groups = 'drop' ) |> # Calculate the difference in ratings for each participant and image
mutate(diff_per_participant = task_Jewish - task_Arab) |>
# Aggregate at the image level
group_by(image) |>
dplyr::summarize(
avg_diff = mean(diff_per_participant, na.rm = TRUE),
.groups = 'drop') |># Classify based on the average difference
mutate(
rated_ethnicity = case_when(
avg_diff < -20 ~ "Arab",
avg_diff > 20 ~ "Jewish",
TRUE ~ "Ambiguous"
)
)|>
mutate(avg_diff = abs(avg_diff)) |>
arrange(avg_diff)
data_images_big_diff_20 <- data_images_20 |>
dplyr::filter(abs(avg_diff) >= 20) |>
mutate(rated_ethnicity = case_when(avg_diff < 0 ~ "Arab",
avg_diff > 0 ~ "Jewish",
.default = NA))
data_images_choosen_20 <- data_images_20 |>
dplyr::filter(abs(avg_diff)<20)|>
mutate(rated_ethnicity = case_when(avg_diff < 0 ~ "Arab",
avg_diff > 0 ~ "Jewish",
.default = NA))
kable(data_images_20, caption = "Difference of means with a cutoff of 20 points")| image | avg_diff | rated_ethnicity |
|---|---|---|
| CFD_M-212-N.png | 0.0000000 | Ambiguous |
| CFD_M-242-N.png | 0.0217391 | Ambiguous |
| CFD_M-211-N.png | 0.0434783 | Ambiguous |
| CFD_M-234-N.png | 0.7173913 | Ambiguous |
| IFD_M-086-N.png | 0.8913043 | Ambiguous |
| CFD_M-227-N.png | 1.3260870 | Ambiguous |
| IFD_M-018-N.png | 1.3260870 | Ambiguous |
| CFD_M-214-N.png | 2.1304348 | Ambiguous |
| CFD_M-236-N.png | 2.2391304 | Ambiguous |
| IFD_M-105-N.png | 2.5869565 | Ambiguous |
| CFD_M-206-N.png | 3.6304348 | Ambiguous |
| CFD_M-218-N.png | 4.2391304 | Ambiguous |
| CFD_M-220-N.png | 4.3695652 | Ambiguous |
| CFD_M-253-N.png | 4.7391304 | Ambiguous |
| CFD_M-248-N.png | 5.1956522 | Ambiguous |
| IFD_M-421-N.png | 5.2826087 | Ambiguous |
| IFD_M-132-N.png | 5.6086957 | Ambiguous |
| IFD_M-419-N.png | 6.2608696 | Ambiguous |
| CFD_M-237-N.png | 6.4130435 | Ambiguous |
| IFD_M-135-N.png | 8.8043478 | Ambiguous |
| IFD_M-108-N.png | 9.0000000 | Ambiguous |
| CFD_M-216-N.png | 9.3695652 | Ambiguous |
| IFD_M-036-N.png | 9.4565217 | Ambiguous |
| IFD_M-416-N.png | 9.9130435 | Ambiguous |
| CFD_M-224-N.png | 9.9347826 | Ambiguous |
| CFD_M-243-N.png | 10.0217391 | Ambiguous |
| CFD_M-247-N.png | 10.7608696 | Ambiguous |
| IFD_M-117-N.png | 10.8043478 | Ambiguous |
| IFD_M-122-N.png | 11.0217391 | Ambiguous |
| CFD_M-229-N.png | 11.1086957 | Ambiguous |
| IFD_M-067-N.png | 11.1521739 | Ambiguous |
| CFD_M-231-N.png | 11.3478261 | Ambiguous |
| IFD_M-100-N.png | 11.3913043 | Ambiguous |
| CFD_M-225-N.png | 11.5217391 | Ambiguous |
| IFD_M-121-N.png | 11.9347826 | Ambiguous |
| CFD_M-204-N.png | 13.4130435 | Ambiguous |
| CFD_M-222-N.png | 14.3043478 | Ambiguous |
| IFD_M-418-N.png | 14.4130435 | Ambiguous |
| IFD_M-033-N.png | 14.5652174 | Ambiguous |
| IFD_M-424-N.png | 15.0000000 | Ambiguous |
| IFD_M-441-N.png | 15.4565217 | Ambiguous |
| CFD_M-251-N.png | 15.5652174 | Ambiguous |
| IFD_M-136-N.png | 15.6739130 | Ambiguous |
| IFD_M-021-N.png | 15.8478261 | Ambiguous |
| CFD_M-221-N.png | 16.6956522 | Ambiguous |
| CFD_M-213-N.png | 17.2391304 | Ambiguous |
| CFD_M-230-N.png | 17.5434783 | Ambiguous |
| CFD_M-200-N.png | 17.9782609 | Ambiguous |
| IFD_M-015-N.png | 18.3260870 | Ambiguous |
| IFD_M-075-N.png | 19.5652174 | Ambiguous |
| IFD_M-113-N.png | 19.5652174 | Ambiguous |
| IFD_M-062-N.png | 19.5869565 | Ambiguous |
| IFD_M-420-N.png | 19.6086957 | Ambiguous |
| CFD_M-246-N.png | 19.8478261 | Ambiguous |
| CFD_M-223-N.png | 19.9130435 | Ambiguous |
| IFD_M-044-N.png | 20.5217391 | Arab |
| IFD_M-087-N.png | 21.6304348 | Jewish |
| CFD_M-252-N.png | 22.3260870 | Arab |
| IFD_M-051-N.png | 23.0217391 | Arab |
| IFD_M-042-N.png | 23.1086957 | Arab |
| IFD_M-035-N.png | 23.4130435 | Jewish |
| CFD_M-239-N.png | 24.5000000 | Arab |
| CFD_M-238-N.png | 25.4565217 | Arab |
| CFD_M-210-N.png | 26.6739130 | Arab |
| IFD_M-017-N.png | 27.1956522 | Jewish |
| IFD_M-111-N.png | 28.5000000 | Arab |
| CFD_M-232-N.png | 28.6739130 | Arab |
| IFD_M-114-N.png | 29.5652174 | Jewish |
| IFD_M-032-N.png | 29.6304348 | Arab |
| IFD_M-097-N.png | 29.7826087 | Arab |
| IFD_M-084-N.png | 29.8478261 | Arab |
| IFD_M-020-N.png | 31.1521739 | Arab |
| CFD_M-250-N.png | 31.6304348 | Jewish |
| CFD_M-235-N.png | 32.0217391 | Arab |
| IFD_M-049-N.png | 33.4347826 | Arab |
| IFD_M-028-N.png | 36.5217391 | Jewish |
| CFD_M-201-N.png | 39.1086957 | Jewish |
| IFD_M-069-N.png | 41.6956522 | Arab |
| IFD_M-423-N.png | 44.1086957 | Arab |
| IFD_M-107-N.png | 49.4130435 | Arab |
| CFD_M-202-N.png | 50.2608696 | Arab |
| IFD_M-066-N.png | 60.5000000 | Arab |
| IFD_M-039-N.png | 61.4565217 | Arab |
| IFD_M-045-N.png | 61.7391304 | Arab |
| IFD_M-046-N.png | 70.9347826 | Arab |
saveRDS(data_images_20, file = "C:/Users/97252/Documents/GitHub/face_cater/PILOT- 1 face categorization/Data/final_data.rdx")Summary Table:
summary_table_20 <- data_images_20 |>
count(rated_ethnicity) |>
spread(key = rated_ethnicity, value = n)
# Print the summary table
print(summary_table_20)# A tibble: 1 × 3
Ambiguous Arab Jewish
<int> <int> <int>
1 55 23 7
# Adjusting data_images to consider the effect of condition order
data_images_conditions <- data_participants |>
group_by(image, Participant_Private_ID) |>
#dplyr::filter(!(Participant_Private_ID %in% exclude_participant_IDs))|> # Uncomment if you need to exclude certain participants
summarize(
task_Jewish = mean(Response[display == "task_Jewish"], na.rm = TRUE),
task_Arab = mean(Response[display == "task_Arab"], na.rm = TRUE),
order_of_conditions = first(order_of_conditions), # Assuming each participant sees each image only once under each condition
.groups = 'drop'
) |>
mutate(diff_per_participant = task_Jewish - task_Arab) |>
group_by(image, order_of_conditions) |> # Group also by order_of_conditions to analyze this factor
summarize(
avg_diff = mean(diff_per_participant, na.rm = TRUE),
.groups = 'drop'
) |>
mutate(
rated_ethnicity = case_when(
avg_diff < -10 ~ "Arab",
avg_diff > 10 ~ "Jewish",
TRUE ~ "Ambiguous"
)
) |>
arrange(order_of_conditions, abs(avg_diff)) # Sort by order of conditions and then by the size of differencesVisualizing image ratings in both display forms.
plot_images <- data_participants |>
group_by(display, image) |>
mutate(per_condition_mean = mean(Response, na.rm = T)) |>
#filter(str_detect(image, pattern = "20")) |>
ggplot(aes(x = display, y = Response)) +
geom_point() +
geom_point(aes(y = per_condition_mean, color = "red"), show.legend = F) +
facet_wrap(~image, scales = "fixed") +
scale_y_continuous(labels = seq(0, 100, 10), breaks = seq(0, 100, 10)) +
theme(axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)) +
theme_classic()
ggsave("plot_images_with_all.png", plot = plot_images, path = "../Plots/", width = 4000, height = 4000, units = "px")data_participants <- data_participants |>
group_by(display, image, order_of_conditions) |>
mutate(per_condition_mean = mean(Response, na.rm = TRUE)) |>
ungroup()
plot_image_condition <- data_participants |>
mutate(order = case_when(display == "task_Arab" & order_of_conditions == "ArabFirst" ~ "First",
display == "task_Jewish" & order_of_conditions == "JewishFirst" ~ "First",
.default = "Second")) |>
ggplot(aes(y = Response, x = order, fill = display)) +
geom_violin(position = "dodge", color = "gray34") +
geom_point(aes(y = per_condition_mean), position = position_dodge(.9), color = "red", show.legend = F) +
facet_wrap(~image)
# Save the modified plot
ggsave("plot_images_order_condition_separated.png", plot = plot_image_condition,path = "../Plots/", width = 40, height = 40, units = "cm", bg = "white", limitsize = FALSE)# Ensure the data is grouped and then summarize
data_wide <- data_participants |>
group_by(image, display) |>
dplyr::summarize(mean_response = mean(Response, na.rm = TRUE), .groups = 'drop')
# Reshaping the data to wide format
data_wide <- data_wide |>
pivot_wider(names_from = display, values_from = mean_response)
# Calculating the difference
data_diff <- data_wide |>
mutate(diff = abs(`task_Arab` - `task_Jewish`), # Replace with your actual display column names
diff_less_than_10 = ifelse(diff < 10, "same", "no_same"))
# Merging the difference back into the original data
data_participants1 <- data_participants |>
left_join(data_diff, by = "image")
data_participants1 <- data_participants1 |>
group_by(image, display) |>
mutate(per_condition_mean = mean(Response, na.rm = TRUE)) |>
ungroup()
# Creating the plot
plot1 <- data_participants1 |>
ggplot(aes(x = display, y = Response, color = diff_less_than_10)) +
geom_point() +
geom_point(aes(y = per_condition_mean, color = "mean"), show.legend = F) +
facet_wrap(~image, scales = "fixed") +
scale_y_continuous(labels = seq(0, 100, 10), breaks = seq(0, 100, 10)) +
theme(axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)) +
theme_classic() +
scale_color_manual(values = c("same" = "#E84646", "no_same" = "black", "mean" = "#7B8FD4"))
# Display the plot
ggsave("plot1_with_all.png", plot = plot1, path = "../Plots/", width = 4000, height = 4000, units = "px")participant_conditions <- data_participants |>
distinct(Participant_Private_ID, order_of_conditions) |>
mutate(label = paste(Participant_Private_ID, ifelse(order_of_conditions == "JewishFirst", "Jewish first", "Arab first")))
plot_participants_condLabel <- data_participants |>
group_by(Participant_Private_ID, display) |>
mutate(participant_mean = mean(Response, na.rm = TRUE)) |>
ggplot(aes(x = display, y = Response)) +
geom_point() +
geom_point(aes(y = participant_mean, color = "red"), show.legend = FALSE) + # Highlighting participant mean
facet_wrap(~Participant_Private_ID, labeller = as_labeller(setNames(participant_conditions$label, participant_conditions$Participant_Private_ID))) +
scale_x_discrete(labels = c("Arab" = "Arab", "Jewish" = "Jewish")) +
scale_y_continuous(breaks = seq(0, 100, 10), labels = seq(0, 100, 10)) +
labs(x = "") +
theme_classic()
ggsave("plot_participants_with_all_label.png", plot = plot_participants_condLabel, path = "../Plots/", width = 4000, height = 4000, units = "px")Adding lines that connects between the ratings of each image:
plot_participants_with_line <- data_participants |>
ggplot(aes(x = display, y = Response)) +
geom_smooth(aes(x = display, y = Response, group = image), method = "lm", color = "gray84", se = F, inherit.aes = F) +
geom_smooth(aes(group = -1), method = "lm", se = F, color = "red") +
facet_wrap(~Participant_Private_ID) +
scale_y_continuous(limits = c(0, 100)) +
theme_classic()
ggsave("plot_participants_with_line_all.png", plot = plot_participants_with_line, path = "../Plots/", width = 4000, height = 4000, units = "px") ## Demographics
data_demo <- data_demo |>
colnames_to_underscores() |>
dplyr::filter(!(Question_Key %in% c("BEGIN QUESTIONNAIRE", "END QUESTIONNAIRE"))) |>
dplyr::filter(Event_Index != "END OF FILE") |>
select(Participant_Private_ID, Question_Key, Response) |>
pivot_wider(names_from = Question_Key, values_from = Response)demo_wide_clean <- data_demo |>
mutate(gender = case_when(`gender-quantised` == "1" ~ "man",
`gender-quantised` == "2" ~ "woman")) |>
select(-`gender-quantised`, -`gender-quantised`, -`gender-text`, -`ethnic-text`, -`religiosity-quantised`, -`scale_of_SES-quantised`, -`age-quantised`) |>
mutate(Participant_Private_ID = factor(Participant_Private_ID),
age = as.numeric(age),
children = as.numeric(children),
scale_of_SES = as.numeric(scale_of_SES))demo_wide_clean <- demo_wide_clean |>
rename(ethnic = `ethnic-1`, SES = scale_of_SES, comment = `response-7`, ethnic2 = `ethnic-4`, )ggplot(demo_wide_clean, aes(x = age)) +
geom_histogram(bins = 50) +
scale_x_continuous(breaks = seq(17, 71, 2)) +
theme_classic()Summary Table for AGE stat:
Age_stats <- get_summary_stats(as.data.frame(as.numeric(demo_wide_clean$age)))
if (!requireNamespace("knitr", quietly = TRUE)) {
install.packages("knitr")
}
library(knitr)
# Generate a nice table using kable
kable(Age_stats, caption = "Summary Statistics for Age", format = "markdown")| as.numeric(demo_wide_clean$age) | |
|---|---|
| Min. :18.00 | |
| 1st Qu.:23.00 | |
| Median :30.00 | |
| Mean :29.49 | |
| 3rd Qu.:33.00 | |
| Max. :72.00 | |
| NA’s :2 |
Age_stats_df <- data.frame(
Statistic = c("Mean", "Median", "SD", "Min", "Max"),
Value = c(mean(demo_wide_clean$age, na.rm = TRUE),
median(demo_wide_clean$age, na.rm = TRUE),
sd(demo_wide_clean$age, na.rm = TRUE),
min(demo_wide_clean$age, na.rm = TRUE),
max(demo_wide_clean$age, na.rm = TRUE))
)
kable(Age_stats_df, caption = "Summary Statistics for Age", format = "markdown")| Statistic | Value |
|---|---|
| Mean | 29.4898 |
| Median | 30.0000 |
| SD | 10.2533 |
| Min | 18.0000 |
| Max | 72.0000 |
ggplot(demo_wide_clean, aes(x = gender)) +
geom_histogram(stat = "count") +
scale_y_continuous(breaks = seq(0, 200, 10)) +
theme_classic()male_per <- sum(demo_wide_clean$gender == "man", na.rm = TRUE) /
sum(!is.na(demo_wide_clean$gender))
female_per <- sum(demo_wide_clean$gender == "woman", na.rm = T) /
sum(!is.na(demo_wide_clean$gender))
a_baniari_per <- sum(demo_wide_clean$gender == "לא בינארי", na.rm = T)/
sum(!is.na(demo_wide_clean$gender))ggplot(demo_wide_clean, aes(x = religiosity)) +
geom_histogram(stat = "count") +
scale_y_continuous(breaks = seq(0, 200, 10)) +
theme_classic()Summary Table for Religiosity stat:
religiosity_stats <- get_summary_stats(as.data.frame(as.numeric(demo_wide_clean$religiosity)))
Age_stats <- get_summary_stats(as.data.frame(as.numeric(demo_wide_clean$age)))
if (!requireNamespace("knitr", quietly = TRUE)) {
install.packages("knitr")
}
library(knitr)
str(demo_wide_clean)tibble [51 × 16] (S3: tbl_df/tbl/data.frame)
$ Participant_Private_ID: Factor w/ 51 levels "10514858","10515072",..: 1 2 3 4 5 6 7 8 9 10 ...
$ age : num [1:51] 19 72 22 19 27 22 19 26 27 29 ...
$ gender : chr [1:51] "man" "man" "man" "woman" ...
$ ethnic : chr [1:51] "ישראלי/ת" "ישראלי/ת" NA "ישראלי/ת" ...
$ ethnic2 : chr [1:51] "יהודי/ת" "יהודי/ת" NA "יהודי/ת" ...
$ ethnic-8 : chr [1:51] "חרד/ית" NA "חרד/ית" NA ...
$ religiosity : chr [1:51] "9" "3" "8" "1" ...
$ education : chr [1:51] "למדתי לימודים מתקדמים מעבר לתואר ראשון" "השלמתי תואר ראשון באוניברסיטה" "השלמתי בית ספר יסודי" "השלמתי בית ספר תיכון" ...
$ education-quantised : chr [1:51] "7" "6" "2" "4" ...
$ language : chr [1:51] "כן" "כן" "כן" "כן" ...
$ language -quantised : chr [1:51] "1" "1" "1" "1" ...
$ vision : chr [1:51] "ראייה מתוקנת (משקפיים/עדשות)" "ראייה מתוקנת (משקפיים/עדשות)" "ראייה מתוקנת (משקפיים/עדשות)" "כן" ...
$ vision-quantised : chr [1:51] "2" "2" "2" "1" ...
$ children : num [1:51] 1 2 1 0 0 0 0 0 0 3 ...
$ SES : num [1:51] 6 8 5 6 6 7 7 5 6 4 ...
$ comment : chr [1:51] NA NA NA NA ...
demo_wide_clean$religiosity <- as.numeric(as.character(demo_wide_clean$religiosity))
# Generate a nice table using kable
kable(religiosity_stats, caption = "Summary Statistics for religiosity", format = "markdown")| as.numeric(demo_wide_clean$religiosity) | |
|---|---|
| Min. : 1.000 | |
| 1st Qu.: 3.000 | |
| Median : 6.000 | |
| Mean : 5.588 | |
| 3rd Qu.: 8.000 | |
| Max. :10.000 |
relig_stats_df <- data.frame(
Statistic = c("Mean", "Median", "SD", "Min", "Max"),
Value = c(mean(demo_wide_clean$religiosity, na.rm = TRUE),
median(demo_wide_clean$religiosity, na.rm = TRUE),
sd(demo_wide_clean$religiosity, na.rm = TRUE),
min(demo_wide_clean$religiosity, na.rm = TRUE),
max(demo_wide_clean$religiosity, na.rm = TRUE))
)
kable(relig_stats_df, caption = "Summary Statistics for Religiosity", format = "markdown")| Statistic | Value |
|---|---|
| Mean | 5.588235 |
| Median | 6.000000 |
| SD | 3.093066 |
| Min | 1.000000 |
| Max | 10.000000 |
demo_table <- flextable::summarizor(demo_wide_clean[,-1], overall_label = "overall") |>
flextable::as_flextable(sep_w = 0, spread_first_col = T)education_levels <- c('1' = 'Part of Primary School', '2' = 'Finished Primary School', '3' = 'Part of High School', '4' = 'Finished High School', '5' = 'In Bachelor\'s Degree', '6' = 'Finished Bachelor\'s Degree', '7' = 'Master\'s Degree', '8' = 'Prefer not to answer')
demo_wide_clean$education <- factor(demo_wide_clean$`education-quantised`, levels = c('1', '2', '3', '4', '5', '6', '7', '8'), labels = c('Part of Primary School', 'Finished Primary School', 'Part of High School', 'Finished High School', 'In Bachelor\'s Degree', 'Finished Bachelor\'s Degree', 'Master\'s Degree', 'Prefer not to answer'))
ggplot(demo_wide_clean, aes(x = education)) +
geom_bar() +
scale_y_continuous(breaks = seq(0, 200, 10)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))ggplot(drop_na(demo_wide_clean, SES), aes(x = SES)) +
geom_histogram(stat = "count", binwidth = 1) +
stat_bin(binwidth = 1, geom = 'text', color = 'white', aes(label = after_stat(count)),
position = position_stack(vjust = 0.5)) +
scale_x_continuous(breaks = c(1:10)) +
scale_y_continuous(breaks = seq(0, 160, 10)) +
labs(title = "On a scale of 1-10 how would you rate your Social-Economic status?",
subtitle = "1 = Lowest status, 10 = Highest status",
y = "Number of participants",
x = "") +
theme_classic() +
theme(plot.title = element_text(family = "serif", hjust = 0.5, size = 16),
plot.subtitle = element_text(family = "serif", hjust = 0.5, size = 10))Summary Table for SES stat:
SES_stats <- get_summary_stats(as.data.frame(as.numeric(demo_wide_clean$SES)))
# Generate a nice table using kable
kable(SES_stats, caption = "Summary Statistics for SES", format = "markdown")| as.numeric(demo_wide_clean$SES) | |
|---|---|
| Min. :2.000 | |
| 1st Qu.:5.000 | |
| Median :6.000 | |
| Mean :5.824 | |
| 3rd Qu.:7.000 | |
| Max. :8.000 |
SES_stats_df <- data.frame(
Statistic = c("Mean", "Median", "SD", "Min", "Max"),
Value = c(mean(demo_wide_clean$SES, na.rm = TRUE),
median(demo_wide_clean$SES, na.rm = TRUE),
sd(demo_wide_clean$SES, na.rm = TRUE),
min(demo_wide_clean$SES, na.rm = TRUE),
max(demo_wide_clean$SES, na.rm = TRUE))
)
# Now generate the table with kable
kable(SES_stats_df, caption = "Summary Statistics for SES", format = "markdown")| Statistic | Value |
|---|---|
| Mean | 5.823529 |
| Median | 6.000000 |
| SD | 1.260252 |
| Min | 2.000000 |
| Max | 8.000000 |